Load Libraries

library(Seurat)
library(SeuratObject)
library(SingleCellExperiment)
library(scater)
library(scran)

Load Data

sobj_stromal = readRDS("/home/idies/workspace/SingleCellConsortium/cutsort/cofest-2024/data/singlecell/Final_scHTAN_colon_stromal_220213.rds")
sobj_stromal = UpdateSeuratObject(sobj_stromal)
Validating object structure
Updating object slots
Ensuring keys are in the proper structure
Ensuring feature names don't have underscores or pipes
Updating slots in RNA
Updating slots in pca
Updating slots in umap
Setting umap DimReduc to global
Setting assay used for NormalizeData.RNA to RNA
Setting assay used for FindVariableFeatures.RNA to RNA
Setting assay used for ScaleData.RNA to RNA
Setting assay used for RunPCA.RNA to RNA
Setting assay used for FindNeighbors.RNA.pca to RNA
Setting assay used for RunUMAP.RNA.pca to RNA
No assay information could be found for FindClusters
Warning: Adding a command log without an assay associated with itObject representation is consistent with the most current Seurat version
sce.stromal = as.SingleCellExperiment(sobj_stromal)
# saveRDS(sce.stromal, file = "sce.stromal.rds")

Explore Data

as.data.frame( colData(sce.stromal) )
plot1 = plotReducedDim(sce.stromal, "UMAP", colour_by="CellType", point_size = 0.5) +
  guides(color = guide_legend(override.aes = list(size=3)))
plot2 = plotReducedDim(sce.stromal, "UMAP", colour_by="DiseaseState", point_size = 0.5) +
  guides(color = guide_legend(override.aes = list(size=3)))
gridExtra::grid.arrange(plot1, plot2, nrow = 1, ncol = 2)

table(colData(sce.stromal)$DiseaseState)

Adenocarcinoma         Normal          Polyp     Unaffected 
          1117           8059           2684           2333 
table(colData(sce.stromal)$ident)

   0    1    2    3    4    5    6    7    8    9   10   11   12   13 
1630 1160 1134 1080 1069  891  734  704  703  611  609  560  482  438 
  14   15   16   17   18   19   20   21   22   23 
 408  375  317  302  298  203  154  152  114   65 
table(colData(sce.stromal)$CellTypeInitial)

Stromal 
  14193 
table(colData(sce.stromal)$CellType)

                    Adipocytes  Cancer Associated Fibroblasts 
                           152                            438 
           Crypt Fibroblasts 1            Crypt Fibroblasts 2 
                          1134                            609 
           Crypt Fibroblasts 3            Crypt Fibroblasts 4 
                           734                            763 
                   Endothelial                           Glia 
                          1491                            375 
   Lymphatic endothelial cells Myofibroblasts/Smooth Muscle 1 
                           317                           5391 
Myofibroblasts/Smooth Muscle 2 Myofibroblasts/Smooth Muscle 3 
                           482                            611 
                       Neurons                      Pericytes 
                            65                            408 
                       Unknown      Villus Fibroblasts WNT5B+ 
                           154                           1069 
unique(colData(sce.stromal)$CellType)
 [1] "Villus Fibroblasts WNT5B+"      "Pericytes"                     
 [3] "Endothelial"                    "Crypt Fibroblasts 1"           
 [5] "Crypt Fibroblasts 3"            "Myofibroblasts/Smooth Muscle 3"
 [7] "Crypt Fibroblasts 4"            "Lymphatic endothelial cells"   
 [9] "Adipocytes"                     "Glia"                          
[11] "Myofibroblasts/Smooth Muscle 2" "Myofibroblasts/Smooth Muscle 1"
[13] "Crypt Fibroblasts 2"            "Neurons"                       
[15] "Unknown"                        "Cancer Associated Fibroblasts" 
colnames(colData(sce.stromal))
 [1] "orig.ident"                                                                                     
 [2] "nCount_RNA"                                                                                     
 [3] "nFeature_RNA"                                                                                   
 [4] "percent.mt"                                                                                     
 [5] "SampleNameOnly"                                                                                 
 [6] "SimplifiedSampleName"                                                                           
 [7] "DifferentialGroup"                                                                              
 [8] "Donor"                                                                                          
 [9] "FAP"                                                                                            
[10] "DiseaseState"                                                                                   
[11] "Size"                                                                                           
[12] "Location"                                                                                       
[13] "Dissociation"                                                                                   
[14] "Polyp.Type"                                                                                     
[15] "Dysplasia"                                                                                      
[16] "HGD"                                                                                            
[17] "LGD"                                                                                            
[18] "Percent_HGD"                                                                                    
[19] "Percent_LGD"                                                                                    
[20] "Percent_sample_wo_neoplasia_overall"                                                            
[21] "Percent_sample_w_cancer_overall"                                                                
[22] "Percent_sample_w_any_degree_dysplasia_overall"                                                  
[23] "Pathologist0_PolypType"                                                                         
[24] "Pathologist0_HGD"                                                                               
[25] "Pathologist0_LGD"                                                                               
[26] "Pathologist0_Percent_of_neoplastic_cell_nuclei_as_a_total_of_all_cell_nuclei_in_tumor_ONLY_area"
[27] "Pathologist0_Percent_nondysplastic_stroma"                                                      
[28] "Pathologist0_Percent_Entire_Tissue_Dysplastic"                                                  
[29] "CellTypeInitial"                                                                                
[30] "CellType"                                                                                       
[31] "ident"                                                                                          
[32] "label"                                                                                          
colLabels(sce.stromal) <- as.factor(colData(sce.stromal)$CellType)
table(colData(sce.stromal)$label)

                    Adipocytes  Cancer Associated Fibroblasts 
                           152                            438 
           Crypt Fibroblasts 1            Crypt Fibroblasts 2 
                          1134                            609 
           Crypt Fibroblasts 3            Crypt Fibroblasts 4 
                           734                            763 
                   Endothelial                           Glia 
                          1491                            375 
   Lymphatic endothelial cells Myofibroblasts/Smooth Muscle 1 
                           317                           5391 
Myofibroblasts/Smooth Muscle 2 Myofibroblasts/Smooth Muscle 3 
                           482                            611 
                       Neurons                      Pericytes 
                            65                            408 
                       Unknown      Villus Fibroblasts WNT5B+ 
                           154                           1069 

Identifying Marker Genes Across Clusters

marker.info <- scoreMarkers(sce.stromal, groups = colData(sce.stromal)$label)
length(unique(colData(sce.stromal)$label))
[1] 16
print(names(marker.info))
 [1] "Adipocytes"                     "Cancer Associated Fibroblasts" 
 [3] "Crypt Fibroblasts 1"            "Crypt Fibroblasts 2"           
 [5] "Crypt Fibroblasts 3"            "Crypt Fibroblasts 4"           
 [7] "Endothelial"                    "Glia"                          
 [9] "Lymphatic endothelial cells"    "Myofibroblasts/Smooth Muscle 1"
[11] "Myofibroblasts/Smooth Muscle 2" "Myofibroblasts/Smooth Muscle 3"
[13] "Neurons"                        "Pericytes"                     
[15] "Unknown"                        "Villus Fibroblasts WNT5B+"     
stromal.markers = 
  lapply(marker.info, function(x){
  # colnames(x)
  cluster1 = as.data.frame(x)
  ordered.meanAUC <- cluster1[order(cluster1$mean.AUC, decreasing=TRUE),]
  ordered.meanlogFC.cohen <- cluster1[order(cluster1$mean.logFC.cohen, decreasing=TRUE),]
  # ordered.meanlogFC.det <- cluster1[order(cluster1$mean.logFC.detected, decreasing=TRUE),]
  markers = unique(c(rownames(ordered.meanAUC[1:10,c(10:14)]), 
                   rownames(ordered.meanlogFC.cohen[1:10,c(5:9)])))  
  markers
})

Inspect the plots visually for the genes/markers

plotExpression(sce.stromal, features=stromal.markers$`Myofibroblasts/Smooth Muscle 1`,
               x="label", colour_by="label") +
  theme(axis.text.x = element_text(angle = 70, vjust = 1, hjust=1))

plotExpression(sce.stromal, features=stromal.markers$`Myofibroblasts/Smooth Muscle 1`,
               x="DiseaseState", colour_by="DiseaseState") +
  theme(axis.text.x = element_text(angle = 70, vjust = 1, hjust=1))

plotUMAP(sce.stromal, colour_by="MYH11" )

Document software

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.1 LTS

Matrix products: default
BLAS:   /home/idies/R/lib/R/lib/libRblas.so 
LAPACK: /home/idies/R/lib/R/lib/libRlapack.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] scran_1.28.2                scater_1.28.0              
 [3] ggplot2_3.4.4               scuttle_1.10.3             
 [5] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
 [7] Biobase_2.60.0              GenomicRanges_1.52.1       
 [9] GenomeInfoDb_1.36.4         IRanges_2.34.1             
[11] S4Vectors_0.38.2            BiocGenerics_0.46.0        
[13] MatrixGenerics_1.12.3       matrixStats_1.0.0          
[15] SeuratObject_4.1.4          Seurat_4.3.0               

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.1            
  [3] later_1.3.1               bitops_1.0-7             
  [5] tibble_3.2.1              polyclip_1.10-6          
  [7] lifecycle_1.0.3           edgeR_3.42.4             
  [9] globals_0.16.2            lattice_0.21-8           
 [11] MASS_7.3-60               magrittr_2.0.3           
 [13] limma_3.56.2              plotly_4.10.2            
 [15] sass_0.4.7                rmarkdown_2.25           
 [17] jquerylib_0.1.4           yaml_2.3.7               
 [19] metapod_1.8.0             httpuv_1.6.11            
 [21] sctransform_0.4.1         sp_2.1-1                 
 [23] spatstat.sparse_3.0-2     reticulate_1.34.0        
 [25] cowplot_1.1.1             pbapply_1.7-2            
 [27] RColorBrewer_1.1-3        abind_1.4-5              
 [29] zlibbioc_1.46.0           Rtsne_0.16               
 [31] purrr_1.0.2               RCurl_1.98-1.12          
 [33] GenomeInfoDbData_1.2.10   ggrepel_0.9.4            
 [35] irlba_2.3.5.1             listenv_0.9.0            
 [37] spatstat.utils_3.0-3      goftest_1.2-3            
 [39] spatstat.random_3.1-6     dqrng_0.3.1              
 [41] fitdistrplus_1.1-11       parallelly_1.36.0        
 [43] DelayedMatrixStats_1.22.6 leiden_0.4.3             
 [45] codetools_0.2-19          DelayedArray_0.26.7      
 [47] tidyselect_1.2.0          farver_2.1.1             
 [49] ScaledMatrix_1.8.1        viridis_0.6.4            
 [51] spatstat.explore_3.2-3    jsonlite_1.8.7           
 [53] BiocNeighbors_1.18.0      ellipsis_0.3.2           
 [55] progressr_0.14.0          ggridges_0.5.4           
 [57] survival_3.5-5            tools_4.3.1              
 [59] ica_1.0-3                 Rcpp_1.0.11              
 [61] glue_1.6.2                gridExtra_2.3            
 [63] xfun_0.40                 dplyr_1.1.3              
 [65] withr_2.5.1               fastmap_1.1.1            
 [67] bluster_1.10.0            fansi_1.0.5              
 [69] digest_0.6.33             rsvd_1.0.5               
 [71] R6_2.5.1                  mime_0.12                
 [73] colorspace_2.1-0          scattermore_1.2          
 [75] tensor_1.5                spatstat.data_3.0-1      
 [77] utf8_1.2.3                tidyr_1.3.0              
 [79] generics_0.1.3            data.table_1.14.8        
 [81] httr_1.4.7                htmlwidgets_1.6.2        
 [83] S4Arrays_1.0.6            uwot_0.1.16              
 [85] pkgconfig_2.0.3           gtable_0.3.4             
 [87] lmtest_0.9-40             XVector_0.40.0           
 [89] htmltools_0.5.6.1         scales_1.2.1             
 [91] png_0.1-8                 knitr_1.44               
 [93] rstudioapi_0.15.0         reshape2_1.4.4           
 [95] nlme_3.1-162              cachem_1.0.8             
 [97] zoo_1.8-12                stringr_1.5.0            
 [99] KernSmooth_2.23-21        parallel_4.3.1           
[101] miniUI_0.1.1.1            vipor_0.4.5              
[103] pillar_1.9.0              grid_4.3.1               
[105] vctrs_0.6.4               RANN_2.6.1               
[107] promises_1.2.1            BiocSingular_1.16.0      
[109] beachmat_2.16.0           xtable_1.8-4             
[111] cluster_2.1.4             beeswarm_0.4.0           
[113] evaluate_0.22             cli_3.6.1                
[115] locfit_1.5-9.8            compiler_4.3.1           
[117] rlang_1.1.1               crayon_1.5.2             
[119] future.apply_1.11.0       labeling_0.4.3           
[121] plyr_1.8.9                ggbeeswarm_0.7.2         
[123] stringi_1.7.12            viridisLite_0.4.2        
[125] deldir_1.0-9              BiocParallel_1.34.2      
[127] munsell_0.5.0             lazyeval_0.2.2           
[129] spatstat.geom_3.2-5       Matrix_1.6-1.1           
[131] patchwork_1.1.3           sparseMatrixStats_1.12.2 
[133] future_1.33.0             statmod_1.5.0            
[135] shiny_1.7.5.1             ROCR_1.0-11              
[137] igraph_1.5.1              bslib_0.5.1              
LS0tCnRpdGxlOiAiRXhwbG9yZSBQcmVDYW5jZXIgQWx0YXMgKFBDQSkgc2NSTkEtc2VxIERhdGEgKHN0cm9tYWwpIgphdXRob3I6ICJZdWJhIEJoYW5kYXJpIgpkYXRlOiAiMjAyMy0xMS0yMCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQotLS0KCiMgU3VtbWFyeQoKLSBodHRwczovL3B1Ym1lZC5nb3YvMzU3MjYwNjcKLSBodHRwczovL3d3dy5uY2JpLm5sbS5uaWguZ292L2dlby9xdWVyeS9hY2MuY2dpP2FjYz1HU0UyMDEzNDgKLSBodHRwczovL2h1bWFudHVtb3JhdGxhcy5vcmcvaHRhMTAKCiMgTG9hZCBMaWJyYXJpZXMKCmBgYHtyIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KFNldXJhdE9iamVjdCkKbGlicmFyeShTaW5nbGVDZWxsRXhwZXJpbWVudCkKbGlicmFyeShzY2F0ZXIpCmxpYnJhcnkoc2NyYW4pCmBgYAoKIyBMb2FkIERhdGEKCmBgYHtyfQpzb2JqX3N0cm9tYWwgPSByZWFkUkRTKCIvaG9tZS9pZGllcy93b3Jrc3BhY2UvU2luZ2xlQ2VsbENvbnNvcnRpdW0vY3V0c29ydC9jb2Zlc3QtMjAyNC9kYXRhL3NpbmdsZWNlbGwvRmluYWxfc2NIVEFOX2NvbG9uX3N0cm9tYWxfMjIwMjEzLnJkcyIpCnNvYmpfc3Ryb21hbCA9IFVwZGF0ZVNldXJhdE9iamVjdChzb2JqX3N0cm9tYWwpCnNjZS5zdHJvbWFsID0gYXMuU2luZ2xlQ2VsbEV4cGVyaW1lbnQoc29ial9zdHJvbWFsKQojIHNhdmVSRFMoc2NlLnN0cm9tYWwsIGZpbGUgPSAic2NlLnN0cm9tYWwucmRzIikKYGBgCgojIEV4cGxvcmUgRGF0YQoKYGBge3J9CmFzLmRhdGEuZnJhbWUoIGNvbERhdGEoc2NlLnN0cm9tYWwpICkKYGBgCgpgYGB7ciBmaWcud2lkdGg9MTB9CnBsb3QxID0gcGxvdFJlZHVjZWREaW0oc2NlLnN0cm9tYWwsICJVTUFQIiwgY29sb3VyX2J5PSJDZWxsVHlwZSIsIHBvaW50X3NpemUgPSAwLjUpICsKICBndWlkZXMoY29sb3IgPSBndWlkZV9sZWdlbmQob3ZlcnJpZGUuYWVzID0gbGlzdChzaXplPTMpKSkKcGxvdDIgPSBwbG90UmVkdWNlZERpbShzY2Uuc3Ryb21hbCwgIlVNQVAiLCBjb2xvdXJfYnk9IkRpc2Vhc2VTdGF0ZSIsIHBvaW50X3NpemUgPSAwLjUpICsKICBndWlkZXMoY29sb3IgPSBndWlkZV9sZWdlbmQob3ZlcnJpZGUuYWVzID0gbGlzdChzaXplPTMpKSkKZ3JpZEV4dHJhOjpncmlkLmFycmFuZ2UocGxvdDEsIHBsb3QyLCBucm93ID0gMSwgbmNvbCA9IDIpCmBgYAoKYGBge3J9CnRhYmxlKGNvbERhdGEoc2NlLnN0cm9tYWwpJERpc2Vhc2VTdGF0ZSkKdGFibGUoY29sRGF0YShzY2Uuc3Ryb21hbCkkaWRlbnQpCnRhYmxlKGNvbERhdGEoc2NlLnN0cm9tYWwpJENlbGxUeXBlSW5pdGlhbCkKdGFibGUoY29sRGF0YShzY2Uuc3Ryb21hbCkkQ2VsbFR5cGUpCnVuaXF1ZShjb2xEYXRhKHNjZS5zdHJvbWFsKSRDZWxsVHlwZSkKY29sbmFtZXMoY29sRGF0YShzY2Uuc3Ryb21hbCkpCgpjb2xMYWJlbHMoc2NlLnN0cm9tYWwpIDwtIGFzLmZhY3Rvcihjb2xEYXRhKHNjZS5zdHJvbWFsKSRDZWxsVHlwZSkKdGFibGUoY29sRGF0YShzY2Uuc3Ryb21hbCkkbGFiZWwpCmBgYAoKIyBJZGVudGlmeWluZyBNYXJrZXIgR2VuZXMgQWNyb3NzIENsdXN0ZXJzCgpgYGB7cn0KbWFya2VyLmluZm8gPC0gc2NvcmVNYXJrZXJzKHNjZS5zdHJvbWFsLCBncm91cHMgPSBjb2xEYXRhKHNjZS5zdHJvbWFsKSRsYWJlbCkKbGVuZ3RoKHVuaXF1ZShjb2xEYXRhKHNjZS5zdHJvbWFsKSRsYWJlbCkpCnByaW50KG5hbWVzKG1hcmtlci5pbmZvKSkKYGBgCgpgYGB7cn0Kc3Ryb21hbC5tYXJrZXJzID0gCiAgbGFwcGx5KG1hcmtlci5pbmZvLCBmdW5jdGlvbih4KXsKICAjIGNvbG5hbWVzKHgpCiAgY2x1c3RlcjEgPSBhcy5kYXRhLmZyYW1lKHgpCiAgb3JkZXJlZC5tZWFuQVVDIDwtIGNsdXN0ZXIxW29yZGVyKGNsdXN0ZXIxJG1lYW4uQVVDLCBkZWNyZWFzaW5nPVRSVUUpLF0KICBvcmRlcmVkLm1lYW5sb2dGQy5jb2hlbiA8LSBjbHVzdGVyMVtvcmRlcihjbHVzdGVyMSRtZWFuLmxvZ0ZDLmNvaGVuLCBkZWNyZWFzaW5nPVRSVUUpLF0KICAjIG9yZGVyZWQubWVhbmxvZ0ZDLmRldCA8LSBjbHVzdGVyMVtvcmRlcihjbHVzdGVyMSRtZWFuLmxvZ0ZDLmRldGVjdGVkLCBkZWNyZWFzaW5nPVRSVUUpLF0KICBtYXJrZXJzID0gdW5pcXVlKGMocm93bmFtZXMob3JkZXJlZC5tZWFuQVVDWzE6MTAsYygxMDoxNCldKSwgCiAgICAgICAgICAgICAgICAgICByb3duYW1lcyhvcmRlcmVkLm1lYW5sb2dGQy5jb2hlblsxOjEwLGMoNTo5KV0pKSkgIAogIG1hcmtlcnMKfSkKYGBgCgojIEluc3BlY3QgdGhlIHBsb3RzIHZpc3VhbGx5IGZvciB0aGUgZ2VuZXMvbWFya2VycwoKYGBge3IgZmlnLndpZHRoPTEwfQpwbG90RXhwcmVzc2lvbihzY2Uuc3Ryb21hbCwgZmVhdHVyZXM9c3Ryb21hbC5tYXJrZXJzJGBNeW9maWJyb2JsYXN0cy9TbW9vdGggTXVzY2xlIDFgLAogICAgICAgICAgICAgICB4PSJsYWJlbCIsIGNvbG91cl9ieT0ibGFiZWwiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA3MCwgdmp1c3QgPSAxLCBoanVzdD0xKSkKYGBgCgpgYGB7ciBmaWcud2lkdGg9MTB9CnBsb3RFeHByZXNzaW9uKHNjZS5zdHJvbWFsLCBmZWF0dXJlcz1zdHJvbWFsLm1hcmtlcnMkYE15b2ZpYnJvYmxhc3RzL1Ntb290aCBNdXNjbGUgMWAsCiAgICAgICAgICAgICAgIHg9IkRpc2Vhc2VTdGF0ZSIsIGNvbG91cl9ieT0iRGlzZWFzZVN0YXRlIikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNzAsIHZqdXN0ID0gMSwgaGp1c3Q9MSkpCmBgYAoKYGBge3IgZmlnLndpZHRoPTEwfQpwbG90VU1BUChzY2Uuc3Ryb21hbCwgY29sb3VyX2J5PSJNWUgxMSIgKQpgYGAKCiMgRG9jdW1lbnQgc29mdHdhcmUKCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoK